Libraries required for this analysis

knitr::opts_chunk$set(fig.align="center") 
library(rstanarm)
library(tidyverse)
library(tidybayes)
library(modelr) 
library(ggplot2)
library(magrittr)  
library(emmeans)
library(bayesplot)
library(brms)
library(gganimate)

theme_set(theme_light())

source('helper_functions.R')

In our experiment, we used a visualization recommendation algorithm (composed of one search algorithm and one oracle algorithm) to generate visualizations for the user on one of two datasets. We then measured the user’s time to complete each of four tasks: 1. Find Extremum 2. Retrieve Value 3. Prediction 4. Exploration

Given a search algorithm (bfs or dfs), an oracle (compassql or dziban), and a dataset (birdstrikes or movies), we would like to predict the time it takes the average user to complete each task. In addition, we would like to know if the choice of search algorithm and oracle has any meaningful impact on a user’s completion time for each of these four tasks, and if the participant’s group (student or professional) is associated with a difference in performance.

Read in and clean data

time_data = read.csv('split_by_participant_groups/completion_time.csv')

time_data <- time_data %>%
  mutate(
    dataset = as.factor(dataset),
    oracle = as.factor(oracle),
    search = as.factor(search),
    task = as.factor(task)
  )
time_data$condition <- paste(time_data$oracle, time_data$search)

task_list = c("1. Find Extremum",
              "2. Retrieve Value",
              "3. Prediction",
              "4. Exploration")

seed = 12

Building a Model for Time Analysis

The weakly informative prior (normal(360.48, 224.40)) was derived from pilot studies and describes the distribution of time (in seconds) needed to complete any given task under any condition. Because our pilot study was small, we chose to aggregate these measurements (rather than deriving separate priors for each task) to minimize the effect of biases.

The lognormal family was selected to prevent our model from predicting completion times of less than zero seconds.

model <- brm(
    formula = bf(
    completion_time ~ oracle * search + dataset + task + participant_group + (1 | participant_id)
  ),
    prior = prior(normal(360.48, 224.40), class = "Intercept"),
    chains = 2,
    cores = 2,
    iter = 2500,
    warmup = 1000,
    data = time_data,
    family = lognormal(),
    file="models/time",
    seed = seed
  )   
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Start sampling

Diagnostics + Model Evaluation

In the summary table, we want to see Rhat values close to 1.0 and Bulk_ESS in the thousands.

summary(model)
##  Family: lognormal 
##   Links: mu = identity; sigma = identity 
## Formula: completion_time ~ oracle * search + dataset + task + participant_group + (1 | participant_id) 
##    Data: time_data (Number of observations: 288) 
## Samples: 2 chains, each with iter = 2500; warmup = 1000; thin = 1;
##          total post-warmup samples = 3000
## 
## Group-Level Effects: 
## ~participant_id (Number of levels: 72) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.19      0.04     0.11     0.27 1.00      850      783
## 
## Population-Level Effects: 
##                          Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                    5.29      0.09     5.12     5.48 1.00     2514
## oracledziban                 0.03      0.09    -0.16     0.20 1.00     2078
## searchdfs                    0.10      0.09    -0.08     0.27 1.00     1887
## datasetmovies               -0.08      0.07    -0.21     0.05 1.00     2915
## task2.RetrieveValue          0.01      0.07    -0.12     0.14 1.00     3854
## task3.Prediction             1.12      0.07     0.98     1.25 1.00     4287
## task4.Exploration            1.22      0.07     1.09     1.35 1.00     3413
## participant_groupstudent     0.06      0.07    -0.07     0.19 1.00     2846
## oracledziban:searchdfs      -0.11      0.13    -0.37     0.14 1.00     1956
##                          Tail_ESS
## Intercept                    2488
## oracledziban                 2094
## searchdfs                    1917
## datasetmovies                2439
## task2.RetrieveValue          2361
## task3.Prediction             2510
## task4.Exploration            2458
## participant_groupstudent     2506
## oracledziban:searchdfs       2082
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     0.40      0.02     0.37     0.45 1.00     1902     2080
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Trace plots help us check whether there is evidence of non-convergence for model.

plot(model)

In our pairs plots, we want to make sure we don’t have highly correlated parameters (highly correlated parameters means that our model has difficulty differentiating the effect of such parameters).

pairs(
  model,
  pars = c("b_Intercept",
          "b_datasetmovies",
           "b_oracledziban",
           "b_searchdfs",
           "b_task2.RetrieveValue",
            "b_task3.Prediction",
            "b_task4.Exploration"),
  fixed = TRUE
)

Using draws from the posterior, we can visualize parameter effects and average response. The thicker, shorter line represents the 95% credible interval, while the thinner, longer line represents the 50% credible interval.

(These won’t be outputted in this analysis document, but the files will be saved to the “../plot_data/posterior_draws/time/” directory)

draw_data <- time_data %>%
  add_fitted_draws(model, seed = seed, re_formula = NA)

for (task_name in task_list) {
  draw_data_sub <- subset(draw_data, task == task_name)

  plot <- posterior_draws_plot(draw_data_sub, "dataset", FALSE, "Predicted Mean Completion Time (seconds)", "Oracle/Search Combination")
  plot
  filename = gsub("^.*\\.","", task_name )
  filename = gsub(" ", "_", filename)
  filename = paste("time", filename, sep = "")

  ggsave(
    file = paste(filename, ".png", sep = ""),
    plot = plot,
    path = "../plots/posterior_draws/time", width = 7, height = 7
  )
  
  fit_info <- draw_data_sub %>% group_by(search, oracle, dataset) %>% mean_qi(.value, .width = c(.95, .5))
  fit_info
  write.csv(fit_info,
            paste("../plot_data/posterior_draws/time/", filename, ".csv", sep = ""),
            row.names = FALSE)
}

Now let’s make a summary plot for the two tasks we care about (1. Find Extremum and 2. Retrieve Value) with predicted mean completion times for each oracle/search combination.

plot_data <- draw_data
plot_data <- plot_data[plot_data$task %in% c("1. Find Extremum", "2. Retrieve Value"),]
plot_data$task <- factor(plot_data$task)
plot_data$oracle<- gsub('compassql', 'CompassQL', plot_data$oracle)
plot_data$oracle<- gsub('dziban', 'Dziban', plot_data$oracle)
plot_data$search<- gsub('bfs', 'BFS', plot_data$search)
plot_data$search<- gsub('dfs', 'DFS', plot_data$search)
plot_data$dataset<- gsub('birdstrikes', 'Birdstrikes', plot_data$dataset)
plot_data$dataset<- gsub('movies', 'Movies', plot_data$dataset)
plot_data$Dataset<- plot_data$dataset

plot_data$condition <- paste(plot_data$oracle, plot_data$search, sep='\n')
draw_plot <- posterior_draws_plot(plot_data, "Dataset", TRUE, "Predicted Mean Completion Time (seconds)", "Oracle/Search Combination") + theme(axis.text.y=element_text(size=12) )+ scale_alpha(guide = 'none')  + xlab("Predicted Mean Completion Time (seconds)")

draw_plot

## # A tibble: 32 x 10
## # Groups:   search, oracle, dataset [8]
##    search oracle  dataset  task     .value .lower .upper .width .point .interval
##    <chr>  <chr>   <chr>    <fct>     <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
##  1 BFS    Compas… Birdstr… 1. Find…   224.   186.   269.   0.95 mean   qi       
##  2 BFS    Compas… Birdstr… 2. Retr…   226.   187.   272.   0.95 mean   qi       
##  3 BFS    Compas… Movies   1. Find…   206.   170.   245.   0.95 mean   qi       
##  4 BFS    Compas… Movies   2. Retr…   207.   171.   248.   0.95 mean   qi       
##  5 BFS    Dziban  Birdstr… 1. Find…   230.   189.   276.   0.95 mean   qi       
##  6 BFS    Dziban  Birdstr… 2. Retr…   231.   190.   279.   0.95 mean   qi       
##  7 BFS    Dziban  Movies   1. Find…   211.   173.   253.   0.95 mean   qi       
##  8 BFS    Dziban  Movies   2. Retr…   213.   174.   258.   0.95 mean   qi       
##  9 DFS    Compas… Birdstr… 1. Find…   248.   204.   300.   0.95 mean   qi       
## 10 DFS    Compas… Birdstr… 2. Retr…   250.   206.   302.   0.95 mean   qi       
## # … with 22 more rows

Differences Between Conditions

Next, we want to see if there is any significant difference in completion time between the two search algorithms (bfs and dfs), the two oracles (dzbian and compassql), and the two participant groups (student and professional)

predictive_data <- time_data %>%
  add_fitted_draws(model, seed = seed, re_formula = NA)

Differences Between Oracle

oracle_differences <- expected_diff_in_mean_plot(predictive_data, "oracle", "Difference in Mean Completion Time (Seconds)",  "Task", NULL)
## `summarise()` regrouping output by 'oracle', 'task' (override with `.groups` argument)
ggsave(file="oracle_time_differences.png", plot=oracle_differences$plot, path = "../plots/comparisons/time", width = 7, height = 7)

oracle_differences$plot

We can double-check the boundaries of the credible intervals to be sure whether or not the interval contains zero.

write.csv(oracle_differences$intervals, "../plot_data/comparisons/time/oracle_time_differences.csv", sep="", row.names = FALSE)
## Warning in write.csv(oracle_differences$intervals, "../plot_data/comparisons/
## time/oracle_time_differences.csv", : attempt to set 'sep' ignored
oracle_differences$intervals
## # A tibble: 8 x 8
## # Groups:   oracle [1]
##   oracle          task          difference .lower .upper .width .point .interval
##   <chr>           <fct>              <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 dziban - compa… 1. Find Extr…      -6.70  -35.8  20.6    0.95 mean   qi       
## 2 dziban - compa… 2. Retrieve …      -6.73  -36.7  21.3    0.95 mean   qi       
## 3 dziban - compa… 3. Prediction     -20.4  -110.   63.7    0.95 mean   qi       
## 4 dziban - compa… 4. Explorati…     -22.6  -121.   71.5    0.95 mean   qi       
## 5 dziban - compa… 1. Find Extr…      -6.70  -16.3   2.96   0.5  mean   qi       
## 6 dziban - compa… 2. Retrieve …      -6.73  -16.4   2.96   0.5  mean   qi       
## 7 dziban - compa… 3. Prediction     -20.4   -49.4   9.14   0.5  mean   qi       
## 8 dziban - compa… 4. Explorati…     -22.6   -55.3   9.97   0.5  mean   qi

Let’s do the above, but split it by datasets.

oracle_differences_subset_split_by_dataset <- expected_diff_in_mean_plot(predictive_data, "oracle", "Difference in Mean Completion Time (Seconds)",  "Task", "dataset")
## `summarise()` regrouping output by 'oracle', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_oracle_time_differences.png", plot=oracle_differences_subset_split_by_dataset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

oracle_differences_subset_split_by_dataset$plot

Check intervals for search differences split by dataset.

write.csv(oracle_differences_subset_split_by_dataset$intervals, "../plot_data/comparisons/time/oracle_time_differences_split_by_dataset.csv", row.names = FALSE)
oracle_differences_subset_split_by_dataset$intervals
## # A tibble: 16 x 9
## # Groups:   oracle, dataset [2]
##    oracle     dataset  task     difference .lower .upper .width .point .interval
##    <chr>      <fct>    <fct>         <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
##  1 dziban - … birdstr… 1. Find…      -6.99  -37.3  21.6    0.95 mean   qi       
##  2 dziban - … birdstr… 2. Retr…      -7.02  -38.3  22.3    0.95 mean   qi       
##  3 dziban - … birdstr… 3. Pred…     -21.3  -115.   66.3    0.95 mean   qi       
##  4 dziban - … birdstr… 4. Expl…     -23.6  -127.   74.7    0.95 mean   qi       
##  5 dziban - … movies   1. Find…      -6.41  -34.4  20.1    0.95 mean   qi       
##  6 dziban - … movies   2. Retr…      -6.44  -34.8  20.3    0.95 mean   qi       
##  7 dziban - … movies   3. Pred…     -19.5  -106.   61.9    0.95 mean   qi       
##  8 dziban - … movies   4. Expl…     -21.7  -114.   69.4    0.95 mean   qi       
##  9 dziban - … birdstr… 1. Find…      -6.99  -17.0   3.05   0.5  mean   qi       
## 10 dziban - … birdstr… 2. Retr…      -7.02  -17.2   3.13   0.5  mean   qi       
## 11 dziban - … birdstr… 3. Pred…     -21.3   -51.6   9.60   0.5  mean   qi       
## 12 dziban - … birdstr… 4. Expl…     -23.6   -57.6  10.6    0.5  mean   qi       
## 13 dziban - … movies   1. Find…      -6.41  -15.6   2.87   0.5  mean   qi       
## 14 dziban - … movies   2. Retr…      -6.44  -15.7   2.82   0.5  mean   qi       
## 15 dziban - … movies   3. Pred…     -19.5   -47.3   8.89   0.5  mean   qi       
## 16 dziban - … movies   4. Expl…     -21.7   -53.1   9.57   0.5  mean   qi

Differences Between Participant Groups

participant_group_differences <- expected_diff_in_mean_plot(predictive_data, "participant_group", "Difference in Mean Completion Time (Seconds)",  "Task", NULL)
## `summarise()` regrouping output by 'participant_group', 'task' (override with `.groups` argument)
ggsave(file="participant_group_time_differences.png", plot=participant_group_differences$plot, path = "../plots/comparisons/time", width = 7, height = 7)

participant_group_differences$plot

Let’s do the above, but split it by datasets.

participant_group_differences_split_by_dataset <- expected_diff_in_mean_plot(predictive_data, "participant_group", "Difference in Mean Completion Time (Seconds)",  "Task", "dataset")
## `summarise()` regrouping output by 'participant_group', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_participant_group_time_differences.png", plot=participant_group_differences_split_by_dataset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

participant_group_differences_split_by_dataset$plot

We can double-check the boundaries of the credible intervals to be sure whether or not the interval contains zero.

write.csv(participant_group_differences$intervals, "../plot_data/comparisons/time/participant_group_time_differences.csv", sep="", row.names = FALSE)
## Warning in write.csv(participant_group_differences$intervals, "../plot_data/
## comparisons/time/participant_group_time_differences.csv", : attempt to set 'sep'
## ignored
participant_group_differences$intervals
## # A tibble: 8 x 8
## # Groups:   participant_group [1]
##   participant_group   task      difference .lower .upper .width .point .interval
##   <chr>               <fct>          <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 student - professi… 1. Find …       13.5 -15.2    41.9   0.95 mean   qi       
## 2 student - professi… 2. Retri…       13.7 -15.6    42.8   0.95 mean   qi       
## 3 student - professi… 3. Predi…       41.5 -47.6   130.    0.95 mean   qi       
## 4 student - professi… 4. Explo…       45.9 -51.6   144.    0.95 mean   qi       
## 5 student - professi… 1. Find …       13.5   3.60   23.4   0.5  mean   qi       
## 6 student - professi… 2. Retri…       13.7   3.64   23.6   0.5  mean   qi       
## 7 student - professi… 3. Predi…       41.5  11.1    71.5   0.5  mean   qi       
## 8 student - professi… 4. Explo…       45.9  12.2    79.1   0.5  mean   qi

Summary Plots

Plot all of the posterior draws on one plot.

plot <- draw_data %>% ggplot(aes(
    x = .value,
    y = task,
    fill = search,
    alpha = 0.5
  )) + stat_halfeye(.width = c(.95, .5)) +
    labs(x = "Average Completion Time (Seconds)", y = "Task") +  facet_grid(. ~ dataset)
plot

## Saving 7 x 5 in image

#Code for additional plots (mostly subsets)

predictive_data_subset <- predictive_data[predictive_data$task %in% c("1. Find Extremum", "2. Retrieve Value"),]
predictive_data_subset$task <- factor(predictive_data_subset$task)

search_differences_subset <- expected_diff_in_mean_plot(predictive_data_subset, "search", "Difference in Mean Completion Time (Seconds)",  "Task", NULL)
## `summarise()` regrouping output by 'search', 'task' (override with `.groups` argument)
ggsave(file="search_time_differences_subset.png", plot=search_differences_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

search_differences_subset$plot

diff_in_search_prediction_split_by_dataset_subset <- expected_diff_in_mean_plot(predictive_data_subset, "search", "Difference in Mean Completion Time (Seconds)",  "Task", "dataset")
## `summarise()` regrouping output by 'search', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_search_time_differences_subset.png", plot=diff_in_search_prediction_split_by_dataset_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

diff_in_search_prediction_split_by_dataset_subset$plot

oracle_differences_subset <- expected_diff_in_mean_plot(predictive_data_subset, "oracle", "Difference in Mean Completion Time (Seconds)",  "Task", NULL)
## `summarise()` regrouping output by 'oracle', 'task' (override with `.groups` argument)
ggsave(file="oracle_time_differences_subset.png", plot=oracle_differences_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

oracle_differences_subset$plot

oracle_differences_subset_split_by_dataset_subset <- expected_diff_in_mean_plot(predictive_data_subset, "oracle", "Difference in Mean Completion Time (Seconds)",  "Task", "dataset")
## `summarise()` regrouping output by 'oracle', 'task', 'dataset' (override with `.groups` argument)
ggsave(file="split_by_dataset_oracle_time_differences_subset.png", plot=oracle_differences_subset_split_by_dataset_subset$plot, path = "../plots/comparisons/time", width = 7, height = 7)

oracle_differences_subset_split_by_dataset_subset$plot